### PANEL DATA ###


setwd("set working directory")

library(tidyverse)

## Load Germany panels

de_w1 <- read_sav("load germany panel wave 1")
de_w2<- read_dta("load germany panel wave 2")
de_w3 <- read_sav("load germany panel wave 3")
de_w4 <- read_sav("load germany panel wave 4")
de_w5 <- read_sav("load germany panel wave 5")

## Load France panels
fr_w1 <- read_sav("load France panel wave 1")
fr_w2 <- read_sav("load France panel wave 2")
fr_w3 <- read_dta("load France panel wave 3")
fr_w4 <- read_sav("load France panel wave 4")
fr_w5 <- read_sav("load France panel wave 5")

## Load Great Britain panels
uk_w1 <- read_dta("load Great Britain panel wave 1")
uk_w2 <- read_dta("load Great Britain panel wave 2")
uk_w3 <- read_dta("load Great Britain panel wave 3")
uk_w4 <- read_sav("load Great Britain panel wave 4")
uk_w5 <- read_sav("load Great Britain panel wave 5")

## Load United States panels
us_w1 <- read_sav("load United States panel wave 1")
us_w2 <- read_sav("load United States panel wave 2")
us_w3 <- read_sav("load United States panel wave 3")
us_w4 <- read_sav("load United States panel wave 4")
us_w5 <- read_sav("load United States panel wave 5")



## WAVE 1 ## 
uk_w1 <- uk_w1 %>% 
  dplyr::select(GUNQID:cos1_w1_4)

uk_w2 <- uk_w2 %>% 
  dplyr::select(GUNQID:cos1_w2_4)

uk_w3 <- uk_w3 %>% 
  dplyr::select(GUNQID:cos1_w3_4)

uk_w4 <- uk_w4 %>% 
  dplyr::select(GUNQID:cos1_w4_4)

uk_w5 <- uk_w5 %>% 
  dplyr::select(GUNQID:cos1_w5_4)

### US ###

us_w1 <- us_w1 %>% 
  dplyr::select(GUNQID:cos1_w1_4)

us_w2 <- us_w2 %>% 
  dplyr::select(GUNQID:cos1_w2_4)

us_w3 <- us_w3 %>% 
  dplyr::select(GUNQID:cos1_w3_4)

us_w4 <- us_w4 %>% 
  dplyr::select(GUNQID:cos1_w4_4)

us_w5 <- us_w5 %>% 
  dplyr::select(GUNQID:cos1_w5_4)


### DE  ###

de_w1 <- de_w1 %>% 
  dplyr::select(GUNQID:cos1_w1_4)

de_w2 <- de_w2 %>% 
  dplyr::select(GUNQID:cos1_w2_4)

de_w3 <- de_w3 %>% 
  dplyr::select(GUNQID:cos1_w3_4)

de_w4 <- de_w4 %>% 
  dplyr::select(GUNQID:cos1_w4_4)

de_w5 <- de_w5 %>% 
  dplyr::select(GUNQID:cos1_w5_4)

### FR ## 

fr_w1 <- fr_w1 %>% 
  dplyr::select(GUNQID:cos1_w1_4)

fr_w2 <- fr_w2 %>% 
  dplyr::select(GUNQID:cos1_w2_4)

fr_w3 <- fr_w3 %>% 
  dplyr::select(GUNQID:cos1_w3_4)

fr_w4 <- fr_w4 %>% 
  dplyr::select(GUNQID:cos1_w4_4)

fr_w5 <- fr_w5 %>% 
  dplyr::select(GUNQID:cos1_w5_4)




library(dplyr)

uk_w1 <- uk_w1 %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)

uk_w2 <- uk_w2 %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)

uk_w3 <- uk_w3 %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)

uk_w4 <- uk_w4 %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)

uk_w5 <- uk_w5 %>%
  rename_with(~paste0(sub("_w5*", "", .)), -1)


### US ###

us_w1 <- us_w1 %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)

us_w2 <- us_w2 %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)

us_w3 <- us_w3 %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)

us_w4 <- us_w4 %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)

us_w5 <- us_w5 %>%
  rename_with(~paste0(sub("_w5*", "", .)), -1)


## DE ###

de_w1 <- de_w1 %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)

de_w2 <- de_w2 %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)

de_w3 <- de_w3 %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)

de_w4 <- de_w4 %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)

de_w5 <- de_w5 %>%
  rename_with(~paste0(sub("_w5*", "", .)), -1)



###  CLEANING GB ###

uk_w1 <- uk_w1 %>% 
  dplyr::select(-c(Wave1ID:WaveXID))

uk_w1$PanelW1ID <- NULL

uk_w2 <- uk_w2 %>% 
  dplyr::select(-c(DoneW1:DoneW12, WeightPanelW12, taxsplit:selected))

names(uk_w3)
uk_w3 <- uk_w3 %>% 
  dplyr::select(-c(DoneW1:DoneW123, WeightPanelW1, WeightPanelW12, WeightPanelW12, WeightPanelW123, afghanistansplit:vulnsplit))

names(uk_w4)
uk_w4 <- uk_w4 %>% 
  dplyr::select(-c(DoneW1:DoneW1234, WeightPanelW1:WeightPanelW3, WeightPanelW12,WeightPanelW123, CoLSplit:EAsplit))

#install.packages("janitor")
library(janitor)
compare_df_cols(uk_w1, uk_w2, uk_w3, uk_w4)

## education_level ##

uk_w3 <- uk_w3 %>% 
  rename(education_gb = education_level)

uk_w2 <- uk_w2 %>% 
  rename(education_gb = education_level)

uk_w4 <- uk_w4 %>% 
  rename(education_gb = profile_education_level)

uk_w1$education_gb <- NA


## SECOND MISMATCH 

uk_w1$ethnicity_new <- NA


## THIRD MISMATCH
uk_w1$household_income_gb <- NA
uk_w3$gross_household
uk_w3$gross_household
uk_w4$profile_gross_household

uk_w2 <- uk_w2 %>% 
  rename(household_income_gb = gross_household) 

uk_w3 <- uk_w3 %>% 
  rename(household_income_gb = gross_household) 

uk_w4 <- uk_w4 %>% 
  rename(household_income_gb = profile_gross_household) 


## new_socgrade ## 


uk_w1$socialgrade_cie_w1 <- uk_w1$new_socgrade

uk_w1$new_socgrade <- NULL

uk_w3$socialgrade_cie_w34 <- uk_w3$profile_socialgrade_cie

uk_w4$socialgrade_cie_w34 <- uk_w4$profile_socialgrade_cie

uk_w3$profile_socialgrade_cie <- NULL
uk_w4$profile_socialgrade_cie <- NULL

uk_w2$socialgrade_cie_w2 <- uk_w2$socialgrade_cie
uk_w2$socialgrade_cie <- NULL


uk_w2$socialgrade_cie_w1 <- NA
uk_w3$socialgrade_cie_w1 <- NA
uk_w4$socialgrade_cie_w1 <- NA


uk_w1$socialgrade_cie_w2 <- NA
uk_w3$socialgrade_cie_w2 <- NA
uk_w4$socialgrade_cie_w2 <- NA

uk_w1$socialgrade_cie_w34 <- NA
uk_w2$socialgrade_cie_w34 <- NA


## gender ## 

uk_w4 <- uk_w4 %>% 
  rename(gender = profile_gender)

## pastvote_2017 ##

uk_w2$pastvote_2017 <- NA
uk_w3$pastvote_2017 <- NA
uk_w4$pastvote_2017 <- NA

# pastvote_ge_2019 # 
uk_w1$pastvote_ge_2019 <- NA

## profile_GOR ## 

uk_w4 <- uk_w4 %>% 
  rename(region_GOR = profile_GOR)

## profile_religion ## 


uk_w4 <- uk_w4 %>% 
  rename(religion_uk = profile_religion)

uk_w3 <- uk_w3 %>% 
  rename(religion_uk = religion)

uk_w2 <- uk_w2 %>% 
  rename(religion_uk = religion)

uk_w1$religion_uk <- NA

## respdate ##

uk_w1$respdate <- NA
uk_w2$respdate <- NA
uk_w3$respdate <- NA

## voted_ge_2019 ## 

uk_w1$voted_ge_2019 <- NA 

## voted2017 ## 

uk_w2$voted2017 <- NA
uk_w3$voted2017 <- NA
uk_w4$voted2017 <- NA

## WeightPanelW2 ##
compare_df_cols(uk_w1, uk_w2, uk_w3, uk_w4)

uk_w1 <- uk_w1 %>% 
  rename(WeightPanel = WeightPanelW1)

uk_w2 <- uk_w2 %>% 
  rename(WeightPanel = WeightPanelW2)

uk_w3 <- uk_w3 %>% 
  rename(WeightPanel = WeightPanelW3)

uk_w3$WeightPanelW2 <- NULL

uk_w4 <- uk_w4 %>% 
  rename(WeightPanel = WeightPanelW4)

compare_df_cols(uk_w1, uk_w2, uk_w3, uk_w4)


ncol(uk_w1) # 171
ncol(uk_w2) # 171
ncol(uk_w3) # 171
ncol(uk_w4) # 171
compare_df_cols(uk_w1, uk_w2, uk_w3, uk_w4)

compare_df_cols(uk_w1, uk_w2, uk_w3, uk_w4, return = "mismatch")

library(labelled)

uk_w1_not <- remove_labels(uk_w1)
uk_w2_not <- remove_labels(uk_w2)
uk_w3_not <- remove_labels(uk_w3)
uk_w4_not <- remove_labels(uk_w4)

uk_w1_not <- uk_w1_not %>% 
  mutate(wave = "WAVE 1",
         country = "GB")

uk_w2_not <- uk_w2_not %>% 
  mutate(wave = "WAVE 2",
         country = "GB")

uk_w3_not <- uk_w3_not %>% 
  mutate(wave = "WAVE 3",
         country = "GB")

uk_w4_not <- uk_w4_not %>% 
  mutate(wave = "WAVE 4",
         country = "GB")

all_uk <- rbind(uk_w1_not, uk_w2_not, uk_w3_not, uk_w4_not)

data_count_2 <- all_uk %>%
  group_by(GUNQID, .drop = FALSE) %>% 
  count()

all_uk <- all_uk %>% 
  left_join(data_count_2)

all_uk <- all_uk %>% 
  filter(n >=2)


df_names_uk <- as.data.frame(colnames(all_uk))


### check the coding levels are the same for all 



## CLEANING FRANCE ## 

fr_w1 <- fr_w1 %>% 
  dplyr::select(-c(PanelW1ID:WaveXID, groupsplit))

names(fr_w2)

fr_w2 <- fr_w2 %>% 
  dplyr::select(-c(DoneW12:DoneW2, WeightPanelW12))

names(fr_w3)
fr_w3 <- fr_w3 %>% 
  dplyr::select(-c(DoneW1:DoneW123, WeightPanelW1, WeightPanelW2, WeightPanelW12, WeightPanelW123, afghanistansplit:afghsplit))

names(fr_w4)
fr_w4 <- fr_w4 %>% 
  dplyr::select(-c(DoneW1:WeightPanelW3, CoLSplit:UKRsplit))


fr_w1 <- fr_w1 %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)

fr_w2 <- fr_w2 %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)

fr_w3 <- fr_w3 %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)

fr_w4 <- fr_w4 %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)

fr_w5 <- fr_w5 %>%
  rename_with(~paste0(sub("_w5*", "", .)), -1)

library(janitor)
compare_df_cols(fr_w1, fr_w2, fr_w3, fr_w4, fr_w5)


## education, Education, education_recode ##

fr_w1$Education
fr_w2$education
fr_w3$education_recode
fr_w4$education_recode
fr_w5$education_recode


fr_w1 <- fr_w1  %>% 
  rename(education_fr_w1 = Education)

fr_w2 <- fr_w2  %>% 
  rename(education_fr_w24 = education)

fr_w3 <- fr_w3 %>% 
  rename(education_fr_w24 = education_recode) %>% 
  mutate(education_fr_w1 = NA)

fr_w4 <- fr_w4 %>% 
  rename(education_fr_w24 = education_recode) %>% 
  mutate(education_fr_w1 = NA)

compare_df_cols(fr_w1, fr_w2, fr_w3, fr_w4, fr_w5)

### fr_pastvote_presidential22_round1, fr_pastvote_presidential22_round1, fr_voted_presidential22_round1, fr_voted_presidential22_round2

fr_w1$fr_pastvote_presidential22_round1 <- NA
fr_w2$fr_pastvote_presidential22_round1 <- NA
fr_w3$fr_pastvote_presidential22_round1 <- NA

fr_w1$fr_pastvote_presidential22_round1 <- NA
fr_w2$fr_pastvote_presidential22_round1 <- NA
fr_w3$fr_pastvote_presidential22_round1 <- NA

fr_w1$fr_voted_presidential22_round1 <- NA
fr_w2$fr_voted_presidential22_round1 <- NA
fr_w3$fr_voted_presidential22_round1 <- NA

fr_w1$fr_voted_presidential22_round2 <- NA
fr_w2$fr_voted_presidential22_round2 <- NA
fr_w3$fr_voted_presidential22_round2 <- NA


fr_w1$fr_pastvote_presidential22_round2 <- NA
fr_w2$fr_pastvote_presidential22_round2 <- NA
fr_w3$fr_pastvote_presidential22_round2 <- NA

## pb4_politicalscale #

fr_w1$pb4_politicalscale <- NA

## ppol_partyid #

fr_w1$ppol_partyid <- NA

# track7b ##

fr_w2$track7b <- NA
fr_w3$track7b <- NA
fr_w4$track7b <- NA


## Weights ## 

fr_w1 <- fr_w1 %>% 
  rename(WeightPanel = WeightPanelW1)

fr_w2 <- fr_w2 %>% 
  rename(WeightPanel = WeightPanelW2)

fr_w3 <- fr_w3 %>% 
  rename(WeightPanel = WeightPanelW3)

fr_w4 <- fr_w4 %>% 
  rename(WeightPanel = WeightPanelW4)

fr_w5 <- fr_w5 %>% 
  rename(WeightPanel = WeightPanelW5)

compare_df_cols(fr_w1, fr_w2, fr_w3, fr_w4, fr_w5)

ncol(fr_w1) # 169
ncol(fr_w2) # 169
ncol(fr_w3) # 169
ncol(fr_w4) # 169
ncol(fr_w5) # CHECK 

fr_w1$education_fr_w24 <- NA
fr_w2$education_fr_w1 <- NA

## religion ##

library(labelled)

fr_w1_not <- remove_labels(fr_w1)
fr_w2_not <- remove_labels(fr_w2)
fr_w3_not <- remove_labels(fr_w3)
fr_w4_not <- remove_labels(fr_w4)
fr_w4_not <- remove_labels(fr_w5)

fr_w1_not <- fr_w1_not %>% 
  mutate(wave = "WAVE 1",
         country = "FR")

fr_w2_not <- fr_w2_not %>% 
  mutate(wave = "WAVE 2",
         country = "FR")

fr_w3_not <- fr_w3_not %>% 
  mutate(wave = "WAVE 3",
         country = "FR")

fr_w4_not <- fr_w4_not %>% 
  mutate(wave = "WAVE 4",
         country = "FR")

fr_w5_not <- fr_w5_not %>% 
  mutate(wave = "WAVE 5",
         country = "FR")


all_fr <- rbind(fr_w1_not, fr_w2_not, fr_w3_not, fr_w4_not,fr_w5_not)

all_fr <- all_fr %>% 
  rename(household_income_fr = income)

data_count_2 <- all_fr %>%
  group_by(GUNQID, .drop = FALSE) %>% 
  count()

all_fr <- all_fr %>% 
  left_join(data_count_2)

all_fr <- all_fr %>% 
  filter(n >=2)

df_names_fr <- as.data.frame(colnames(all_fr))



### CLEANING GERMANY ## 

de_w1 <- de_w1 %>% 
  dplyr::select(-c(PanelW1ID:WaveXID, randomsplit))

de_w2 <- de_w2 %>% 
  dplyr::select(-c(DoneW1:DoneW12, WeightPanelW12, selected:covidimpactareatext))

de_w3 <- de_w3 %>% 
  dplyr::select(-c(DoneW1:DoneW123, WeightPanelW1, WeightPanelW2, WeightPanelW12, WeightPanelW123, afghanistansplit:tradeoffsplit))

de_w4 <- de_w4 %>% 
  dplyr::select(-c(DoneW1:WeightPanelW3, CoLSplit:treatsplit))

## ADD de_w5 

ncol(de_w1) # 167
ncol(de_w2) # 168
ncol(de_w3) # 179
ncol(de_w4) # 170
ncol(de_w5) # CHECK

de_w1 <- de_w1 %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)

de_w2 <- de_w2 %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)

de_w3 <- de_w3 %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)

de_w4 <- de_w4 %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)

de_w5 <- de_w5 %>%
  rename_with(~paste0(sub("_w5*", "", .)), -1)

library(janitor)
compare_df_cols(de_w1, de_w2, de_w3, de_w4, de_w5)

## education ## 
de_w1 <- de_w1 %>% 
  rename(education_de = educ_neu)

de_w2 <- de_w2 %>% 
  rename(education_de = educ_neu)

de_w3 <- de_w3 %>% 
  rename(education_de = educ_neu)

de_w4 <- de_w4 %>% 
  rename(education_de = educ_neu)

de_w5 <- de_w5 %>% 
  rename(education_de = educ_neu)


## BTW21_Quote_refdk ## 

de_w1$BTW21_Quote_refdk <- NA
de_w2$BTW21_Quote_refdk <- NA
de_w3$BTW21_Quote_refdk <- NA


## effects_1 ##

de_w3 <- de_w3 %>% 
  dplyr::select(-c(effects_1:effects_6))


## hinc ## 

de_w1$hinc <- NULL
de_w1$household_income
de_w2$hinc
de_w3$hinc
de_w4$hinc
de_w5$hinc


de_w1 <- de_w1 %>% 
  rename(household_income_de = household_income)

de_w2 <- de_w2 %>% 
  rename(household_income_de = hinc)

de_w3 <- de_w3 %>% 
  rename(household_income_de = hinc)

de_w4 <- de_w4 %>% 
  rename(household_income_de = hinc)

de_w5 <- de_w5 %>% 
  rename(household_income_de = hinc)

## impact ## 
de_w3$impactgrid <- NULL
de_w3$impact1 <- NULL
de_w3$impact2 <- NULL
de_w3$impact3 <- NULL
de_w3$impact4 <- NULL



## nielsenregion region_nielsen #

de_w1 <- de_w1 %>% 
  rename(nielsenregion = region_nielsen)

## reli ##

de_w1$religion <- NULL
de_w1$religion_de <- NA

de_w2 <- de_w2 %>% 
  rename(religion_de = reli)

de_w3 <- de_w3 %>% 
  rename(religion_de = reli)

de_w4 <- de_w4 %>% 
  rename(religion_de = reli)

de_w5 <- de_w5 %>% 
  rename(religion_de = reli)



## stao ##
de_w1$stao <- NA
de_w2$stao <- NA
de_w3$stao <- NA

## WEIGHTS ##

de_w1 <- de_w1 %>% 
  rename(WeightPanel = WeightPanelW1)

de_w2 <- de_w2 %>% 
  rename(WeightPanel = WeightPanelW2)

de_w3 <- de_w3 %>% 
  rename(WeightPanel = WeightPanelW3)

de_w4 <- de_w4 %>% 
  rename(WeightPanel = WeightPanelW4)

de_w5 <- de_w5 %>% 
  rename(WeightPanel = WeightPanelW5)

## BTW21_Quote_refdk ## 

de_w1$BTW21_Quote_refdk <- NA
de_w2$BTW21_Quote_refdk <- NA
de_w3$BTW21_Quote_refdk <- NA

## BTW21_Quote_refdk. ##

de_w1$BTW21_Quote_refdk <- NA

compare_df_cols(de_w1, de_w2, de_w3, de_w4,de_w5)

de_w1_not <- remove_labels(de_w1)
de_w2_not <- remove_labels(de_w2)
de_w3_not <- remove_labels(de_w3)
de_w4_not <- remove_labels(de_w4)
de_w5_not <- remove_labels(de_w5)


de_w1_not <- de_w1_not %>% 
  mutate(wave = "WAVE 1",
         country = "DE")

de_w2_not <- de_w2_not %>% 
  mutate(wave = "WAVE 2",
         country = "DE")

de_w3_not <- de_w3_not %>% 
  mutate(wave = "WAVE 3",
         country = "DE")

de_w4_not <- de_w4_not %>% 
  mutate(wave = "WAVE 4",
         country = "DE")

de_w5_not <- de_w5_not %>% 
  mutate(wave = "WAVE 5",
         country = "DE")

all_de <- rbind(de_w1_not, de_w2_not, de_w3_not, de_w4_not, de_w5_not)

data_count_2 <- all_de %>%
  group_by(GUNQID, .drop = FALSE) %>% 
  count()

all_de <- all_de %>% 
  left_join(data_count_2)

all_de <- all_de %>% 
  filter(n >=2)

df_names_de <- as.data.frame(colnames(all_de))


### UNITED STATES CLEANING ##
### UNITED STATES CLEANING ##
### UNITED STATES CLEANING ##
### UNITED STATES CLEANING ##


us_w1 <- us_w1 %>% 
  dplyr::select(-c(PanelW1ID:WaveXID))

us_w2 <- us_w2 %>% 
  dplyr::select(-c(DoneW1:DoneW12, WeightPanelW12, taxsplit:selected))

us_w3 <- us_w3 %>% 
  dplyr::select(-c(DoneW1:DoneW123, WeightPanelW1, WeightPanelW2, WeightPanelW12:vulnsplit))

us_w4 <- us_w4 %>% 
  dplyr::select(-c(DoneW1:WeightPanelW3, WeightPanelW12:EAsplit))

## add wave 5 

ncol(us_w1) # 162
ncol(us_w2) # 166
ncol(us_w3) # 167
ncol(us_w4) # 168
ncol(us_w5) # Add wave 5

us_w1 <- us_w1 %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)

us_w2 <- us_w2 %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)

us_w3 <- us_w3 %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)

us_w4 <- us_w4 %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)

us_w5 <- us_w5 %>%
  rename_with(~paste0(sub("_w5*", "", .)), -1)

library(janitor)
compare_df_cols(us_w1, us_w2, us_w3, us_w4, us_w5)

## division ## 

us_w1$division <- NA

## ideo5 ## 

us_w1$ideo5 <- NA


## inputstate ## 
us_w1$inputstate <- NA
us_w2$inputstate <- NA


## pid3 ##
us_w1$pid3 <- NA


# presvote16post, presvote16postx #

us_w2 <- us_w2 %>% 
  rename(presvote16post = presvote16postx)


us_w3$presvote16post <- NA
us_w4$presvote16post <- NA

## presvote20post ##

us_w1$presvote20post <- NA
us_w2$presvote20post <- NA

## race_xbreak  race##

us_w2$race <- NA 
us_w1$race_xbreak <- NA


## religpew ##

us_w2 <- us_w2 %>% 
  rename(religion_us = religpew)

us_w4 <- us_w4 %>% 
  rename(religion_us = religpew)

us_w1$religion_us <- NA
us_w3$religion_us <- NA

## WEIGHTS ## 

us_w1 <- us_w1 %>% 
  rename(WeightPanel = WeightPanelW1)

us_w2 <- us_w2 %>% 
  rename(WeightPanel = WeightPanelW2)

us_w3 <- us_w3 %>% 
  rename(WeightPanel = WeightPanelW3)

us_w4 <- us_w4 %>% 
  rename(WeightPanel = WeightPanelW4)

us_w5 <- us_w5 %>% 
  rename(WeightPanel = WeightPanelW5)

##  household income ## 


compare_df_cols(us_w1, us_w2, us_w3, us_w4, us_wa5)

us_w1_not <- remove_labels(us_w1)
us_w2_not <- remove_labels(us_w2)
us_w3_not <- remove_labels(us_w3)
us_w4_not <- remove_labels(us_w4)
us_w5_not <- remove_labels(us_w5)

us_w1_not <- us_w1_not %>% 
  mutate(wave = "WAVE 1",
         country = "US")

us_w2_not <- us_w2_not %>% 
  mutate(wave = "WAVE 2",
         country = "US")

us_w3_not <- us_w3_not %>% 
  mutate(wave = "WAVE 3",
         country = "US")

us_w4_not <- us_w4_not %>% 
  mutate(wave = "WAVE 4",
         country = "US")

us_w5_not <- us_w5_not %>% 
  mutate(wave = "WAVE 5",
         country = "US")

all_us <- rbind(us_w1_not, us_w2_not, us_w3_not, us_w4_not, us_w5_not)

all_us <- all_us %>% 
  rename(education_us = educ)

data_count_2 <- all_us %>%
  group_by(GUNQID, .drop = FALSE) %>% 
  count()

all_us <- all_us %>% 
  left_join(data_count_2)

all_us <- all_us %>% 
  filter(n >=2)

all_us$profile_gross_household

all_us <- all_us %>% 
  rename(household_income_us = profile_gross_household)

df_names_us <- as.data.frame(colnames(all_us))

compare_df_cols(all_uk, all_fr, all_de, all_us)


all_uk$BTW_17_Quote_refdk <- NA
all_fr$BTW_17_Quote_refdk <- NA
all_us$BTW_17_Quote_refdk <- NA

all_uk$BTW21_Quote_refdk <- NA
all_fr$BTW21_Quote_refdk <- NA
all_us$BTW21_Quote_refdk <- NA

## division ## 

all_uk$division <- NA
all_fr$division <- NA
all_de$division <- NA


## education ##

all_uk$education_us <- NA
all_de$education_us <- NA
all_fr$education_us <- NA

all_us$education_gb <- NA
all_de$education_gb <- NA
all_fr$education_gb <- NA

all_us$education_fr_w1 <- NA
all_de$education_fr_w1 <- NA
all_uk$education_fr_w1 <- NA

all_us$education_fr_w24 <- NA
all_de$education_fr_w24 <- NA
all_uk$education_fr_w24 <- NA

all_us$education_de <- NA
all_fr$education_de <- NA
all_uk$education_de <- NA


# ethnicity_new

all_fr$ethnicity_new <- NA
all_de$ethnicity_new <- NA
all_us$ethnicity_new <- NA


## fr_pastvote_presidential22_round1, fr_pastvote_presidential22_round2, fr_voted_presidential22_round1 , fr_voted_presidential22_round2 #

all_uk$fr_pastvote_presidential22_round1 <- NA
all_de$fr_pastvote_presidential22_round1 <- NA
all_us$fr_pastvote_presidential22_round1 <- NA

all_uk$fr_pastvote_presidential22_round2 <- NA
all_de$fr_pastvote_presidential22_round2 <- NA
all_us$fr_pastvote_presidential22_round2 <- NA

all_uk$fr_voted_presidential22_round1 <- NA
all_de$fr_voted_presidential22_round1 <- NA
all_us$fr_voted_presidential22_round1 <- NA

all_uk$fr_voted_presidential22_round2 <- NA
all_de$fr_voted_presidential22_round2 <- NA
all_us$fr_voted_presidential22_round2 <- NA



## Grouped_Region_2014 ##

all_uk$Grouped_Region_2014 <- NA
all_de$Grouped_Region_2014 <- NA
all_us$Grouped_Region_2014 <- NA

## ideo5 ## 

all_uk$ideo5 <- NA
all_fr$ideo5 <- NA
all_de$ideo5 <- NA

## inputstate ## 
all_uk$inputstate <- NA
all_fr$inputstate <- NA
all_de$inputstate <- NA

##NEW_regions_2015 ## 

all_uk$NEW_regions_2015 <- NA
all_de$NEW_regions_2015 <- NA
all_us$NEW_regions_2015 <- NA




## nielsenregion ## 

all_uk$nielsenregion <- NA
all_fr$nielsenregion <- NA
all_us$nielsenregion <- NA

## partyid ## CHECK


## pastvote_2017; pastvote_EURef; pastvote_ge_2019; #

all_fr$pastvote_2017 <- NA
all_de$pastvote_2017 <- NA
all_us$pastvote_2017 <- NA

all_fr$pastvote_EURef <- NA
all_de$pastvote_EURef <- NA
all_us$pastvote_EURef <- NA

all_fr$pastvote_ge_2019 <- NA
all_de$pastvote_ge_2019 <- NA
all_us$pastvote_ge_2019 <- NA

## pb4_politicalscale ##

all_uk$pb4_politicalscale <- NA
all_de$pb4_politicalscale <- NA
all_us$pb4_politicalscale <- NA

## pid ## 
all_fr$pid <- NA

## pid3 ## 
all_uk$pid3 <- NA
all_fr$pid3 <- NA
all_de$pid3 <- NA


## pidother ## 

all_fr$pidother <- NA
all_de$pidother <- NA

## ppol_partyid ## 

all_uk$ppol_partyid <- NA
all_de$ppol_partyid <- NA
all_us$ppol_partyid <- NA

## Presidential_vote17, Presidential_vote17_round2 ## 

all_uk$Presidential_vote17 <- NA
all_de$Presidential_vote17 <- NA
all_us$Presidential_vote17 <- NA

all_uk$Presidential_vote17_round2 <- NA
all_de$Presidential_vote17_round2 <- NA
all_us$Presidential_vote17_round2 <- NA

## presvote16post, presvote20post profile_gross_household ##

all_uk$presvote16post <- NA
all_fr$presvote16post <- NA
all_de$presvote16post <- NA

all_uk$presvote20post <- NA
all_fr$presvote20post <- NA
all_de$presvote20post <- NA

all_uk$presvote20post <- NA
all_fr$presvote20post <- NA
all_de$presvote20post <- NA

## profile_gross_household ## 

table(all_us$profile_gross_household)



## profile_socialgrade_cie ##


# race #
all_uk$race <- NA
all_fr$race <- NA
all_de$race <- NA

# race_xbreak # 
all_uk$race_xbreak <- NA
all_fr$race_xbreak <- NA
all_de$race_xbreak <- NA

# region #

all_uk$region <- NA
all_fr$region <- NA
all_de$region <- NA

# region_GOR # 
all_fr$region_GOR <- NA
all_de$region_GOR <- NA
all_us$region_GOR <- NA

# religion ## 

#all_fr$religion <- NULL

# respdate #

all_fr$respdate <- NA
all_de$respdate <- NA
all_us$respdate <- NA

# sdg ##

# socialgrade_cie profile_socialgrade_cie #



# stao ## 
all_uk$stao <- NA
all_fr$stao <- NA
all_us$stao <- NA



## track7a ##

all_uk$track7a <- NA
all_de$track7a <- NA
all_us$track7a <- NA

## track7b ## 

all_uk$track7b <- NA
all_de$track7b <- NA
all_us$track7b <- NA

# voted_ge_2019 ## 
all_fr$voted_ge_2019 <- NA
all_de$voted_ge_2019 <- NA
all_us$voted_ge_2019 <- NA

## voted2017 ## 

all_fr$voted2017 <- NA
all_de$voted2017 <- NA
all_us$voted2017 <- NA

## income ## 

all_uk$household_income_de <- NA
all_fr$household_income_de <- NA
all_us$household_income_de <- NA

all_uk$household_income_fr <- NA
all_de$household_income_fr <- NA
all_us$household_income_fr <- NA


all_uk$household_income_us <- NA
all_fr$household_income_us <- NA
all_de$household_income_us <- NA


all_fr$household_income_gb <- NA
all_de$household_income_gb <- NA
all_us$household_income_gb <- NA

## partyid ## 

all_uk$partyid <- NA
all_de$partyid <- NA
all_us$partyid <- NA

## socialgrade_cie_w1 socialgrade_cie_w2 socialgrade_cie_w34 ##

all_fr$socialgrade_cie_w1 <- NA
all_de$socialgrade_cie_w1 <- NA
all_us$socialgrade_cie_w1 <- NA

all_fr$socialgrade_cie_w2 <- NA
all_de$socialgrade_cie_w2 <- NA
all_us$socialgrade_cie_w2 <- NA

all_fr$socialgrade_cie_w34 <- NA
all_de$socialgrade_cie_w34 <- NA
all_us$socialgrade_cie_w34 <- NA


# sdg 

all_fr$sdg <- NA

compare_df_cols(all_uk, all_fr, all_de, all_us)
options(max.print=2000)

## track2 

all_de$track2 <- NA

## track2_1 to track2_8 

all_uk$track2_1 <- NA
all_uk$track2_2 <- NA
all_uk$track2_3 <- NA
all_uk$track2_4 <- NA
all_uk$track2_5 <- NA
all_uk$track2_6 <- NA
all_uk$track2_7 <- NA
all_uk$track2_8 <- NA

all_fr$track2_1 <- NA
all_fr$track2_2 <- NA
all_fr$track2_3 <- NA
all_fr$track2_4 <- NA
all_fr$track2_5 <- NA
all_fr$track2_6 <- NA
all_fr$track2_7 <- NA
all_fr$track2_8 <- NA

all_us$track2_1 <- NA
all_us$track2_2 <- NA
all_us$track2_3 <- NA
all_us$track2_4 <- NA
all_us$track2_5 <- NA
all_us$track2_6 <- NA
all_us$track2_7 <- NA
all_us$track2_8 <- NA


compare_df_cols(all_uk, all_fr, all_de, all_us)

all_uk$track7a <- NULL
all_de$track7a <- NULL
all_us$track7a <- NULL

all_fr <- all_fr %>% 
  rename(track7 = track7a)

names(all_uk)

## religion ## 

all_us$religion_uk <- NA
all_us$religion_de <- NA
all_us$religion_fr <- NA

all_fr$religion_uk <- NA
all_fr$religion_de <- NA
all_fr$religion_us <- NA

all_uk$religion_fr <- NA
all_uk$religion_de <- NA
all_uk$religion_us <- NA

all_de$religion_uk <- NA
all_de$religion_fr <- NA
all_de$religion_us <- NA

all_fr$religion_fr <- NA

library(janitor)
compare_df_cols(all_uk, all_fr, all_de, all_us)

panel <- rbind(all_uk, all_fr, all_de, all_us)

panel <- panel %>% 
  rename(don3_other = don3other,
         don4_other = don4other,
         don6_other = don6other,
         pid_other = pidother)

panel <- panel %>%
  rename(don6_1 = DON6_1)
panel <- panel %>% 
  rename(stao_wo = stao,
         don6_1 = DON6_1) 

write_sav(panel, "panel.sav")



library(haven)
panel <- read_sav("panel.sav")


df_names <- as.data.frame(colnames(panel))

write.csv(df_names, "variables.csv")

library(readr)
variables <- read_csv("variables.csv")

data.frame_order <- variables %>% 
  dplyr::select(dataframe) %>% 
  mutate(exist = 1) %>% 
  rename(var_name = dataframe)

codebook_order <- variables %>% 
  dplyr::select(codebook) %>% 
  mutate(exist_codebook = 1) %>% 
  rename(var_name = codebook)
codebook_order

paste(shQuote(codebook_order$var_name), collapse=", ")

test <- codebook_order %>% 
  left_join(data.frame_order, by = "var_name")

panel <- panel %>% 
  rename(don6_1 = DON6_1) 

panel <- panel %>% 
  select('GUNQID', 'age', 'gender', 'region_GOR', 'socialgrade_cie_w1', 'socialgrade_cie_w2', 'socialgrade_cie_w34', 'voted_ge_2019', 
         'pastvote_ge_2019', 'pastvote_EURef', 'education_gb', 'religion_de', 'wave', 'country', 'n', 'household_income_gb',
         'ethnicity_new', 'respdate', 'WeightPanel', 'voted2017', 'pastvote_2017', 'pastvote_EURef', 'BTW21_Quote_refdk', 'BTW_17_Quote_refdk', 
         'nielsenregion', 'education_de', 'household_income_fr', 'pb4_politicalscale', 'ppol_partyid', 'education_fr_w24', 'household_income_de', 
         'Grouped_Region_2014', 'NEW_regions_2015', 'Presidential_vote17', 'Presidential_vote17_round2', 'region', 'division', 'education_us', 
         'religion_us', 'religion_uk', 'household_income_us', 'race', 'race_xbreak', 'ideo5', 'pid3', 'inputstate', 
         'presvote20post', 'Presidential_vote17', 'Presidential_vote17_round2', 'fr_voted_presidential22_round1', 
         'fr_pastvote_presidential22_round1', 'fr_voted_presidential22_round2', 'fr_pastvote_presidential22_round2', 'education_fr_w1', 'partyid',
         'track1_1', 'track1_2', 'track1_3', 'track1_4', 'track1_5', 'track1_6', 'track1_7', 'track1_8', 'track1_9', 'track1_10', 'track2', 
         'track2_1', 'track2_2', 'track2_3', 'track2_4', 'track2_5', 'track2_6', 'track2_7', 'track2_8', 'track3', 'track4', 'track5', 'track6',
         'track7', 'track7b', 'track8', 'track8_1', 'track8_2', 'track8_3', 'track8_4', 'track8_5', 'track9', 'track10', 'engage1', 'engage1a', 
         'engage1b', 'engage1c', 'engage2', 'engage3', 'engage3a', 'engage3b', 'engage4', 'engage4a', 'engage4b', 'engage4c', 'engage4d', 'don1', 
         'don2', 'don3', 'don3_other', 'don4_1', 'don4_2', 'don4_3', 'don4_4', 'don4_5', 'don4_6', 'don4_7', 'don4_8', 'don4_9', 'don4_10', 
         'don4_11', 'don4_12', 'don4_other', 'don5', 'don6_1', 'don6_2', 'don6_3', 'don6_4', 'don6_5', 'don6_6', 'don6_7', 'don6_8', 'don6_9',
         'don6_10', 'don6_11', 'don6_12', 'don6_other', 'emt_1', 'emt_2', 'emt_3', 'emt_4', 'emt_5', 'emt_6', 'emt_7', 'emt_8', 'emt_9', 'emt_10', 
         'emt_11', 'emt_12', 'emt_13', 'emt_14', 'purpose', 'area_1', 'area_2', 'area_3', 'area_4', 'area_5', 'area_6', 'area_7', 'area_8', 
         'area_9', 'area_10', 'area_11', 'area_12', 'area_13', 'area_14', 'area_15', 'econ1', 'econ2', 'econ3', 'ben1', 'ben1_1', 'ben1_2', 
         'ben1_3', 'ben1_4', 'cost1', 'cost1_1', 'cost1_2', 'cost1_3', 'cost1_4', 'cost5', 'moral1', 'moral1a', 'moral1b', 'moral1c', 'moral1d', 'norm1', 'norm2', 'norm3', 'trust1', 'trust2', 'trust3', 'eff1', 'leftrt', 'pid', 'pid_other', 'demsat', 'dem1', 'dem2', 'dem3', 'mig1', 'mig2', 'mig2_1', 'mig2_2', 'mig2_3', 'mig2_4', 'mig2_5', 'mig3', 'mig4', 'mig4_1', 'mig4_2', 'mig4_3', 'mig4_4', 'sdg', 'dnut', 'erad', 'deserv', 'cos1', 'cos1_1', 'cos1_2', 'cos1_3', 'cos1_4')


panel <- panel %>% 
  rename(don6_1 = DON6_1) 

write_sav(panel, "panel.sav")








